In [3]:

    
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

from tensorflow import keras



In [5]:

    
print(tf.__version__)

Fashion MNIST dataset



In [6]:

    
fashion_mnist = keras.datasets.fashion_mnist

(train_images, train_labels),(test_images, test_labels) = fashion_mnist.load_data()



In [13]:

    
print("train images:",train_images.shape)
print("train labels:",len(train_labels))
print("train labels:",train_labels)

print("test images:",test_images.shape)
print("test labels:",test_labels.shape)









    



train images: (60000, 28, 28)
train labels: 60000
train labels: [9 0 0 ... 3 0 5]
test images: (10000, 28, 28)
test labels: (10000,)



In [11]:

    
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

Preprocessing of the data



In [15]:

    
plt.figure()
plt.imshow(train_images[0])
plt.colorbar()
plt.show()



In [16]:

    
### scaling of these dataset between 0 and 255 to fit NN

train_images = train_images / 255.0
test_images = test_images / 255.0



In [19]:

    
### display the scaled down image version

plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i],cmap=plt.cm.binary)
    plt.xlabel(class_names[train_labels[i]])
plt.show()

Building the Model

Building the neural network requires configuring the layers of the model, then compiling the model.



In [21]:

    
### 1. Setup the layers

model = keras.Sequential([
    keras.layers.Flatten(input_shape=[28,28]),  ## flatten the 28x28 pixels to 1-d vector 786 rows
    keras.layers.Dense(128, activation=tf.nn.relu),  ## These are densely-connected, or fully-connected, neural layers with 128 nodes or neurons
    keras.layers.Dense(10, activation=tf.nn.softmax) ## using softmax to classify 10 classes
])









    



WARNING:tensorflow:From /anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.



In [22]:

    
### 2. compile the layers

model.compile(optimizer = 'adam',loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

Train the model



In [23]:

    
model.fit(train_images, train_labels, epochs=5)









    



Epoch 1/5
60000/60000 [==============================] - 10s 171us/sample - loss: 0.4998 - acc: 0.8238
Epoch 2/5
60000/60000 [==============================] - 10s 171us/sample - loss: 0.3776 - acc: 0.8648
Epoch 3/5
60000/60000 [==============================] - 10s 173us/sample - loss: 0.3377 - acc: 0.8763
Epoch 4/5
60000/60000 [==============================] - 10s 173us/sample - loss: 0.3146 - acc: 0.8854
Epoch 5/5
60000/60000 [==============================] - 10s 173us/sample - loss: 0.2957 - acc: 0.8908






    Out[23]:





<tensorflow.python.keras.callbacks.History at 0xb30452c18>

Evaluate accuracy



In [24]:

    
test_loss, test_acc = model.evaluate(test_images, test_labels)

print("Test Accuracy is: ",test_acc)
print("Test Loss is: ",test_loss)









    



10000/10000 [==============================] - 0s 50us/sample - loss: 0.3458 - acc: 0.8737
Test Accuracy is:  0.8737
Test Loss is:  0.34579968147277834

It turns out, the accuracy on the test dataset is a little less than the accuracy on the training dataset. This gap between training accuracy and test accuracy is an example of overfitting. Overfitting is when a machine learning model performs worse on new data than on their training data.

Making Predictions



In [25]:

    
predictions = model.predict(test_images)



In [26]:

    
predictions[0]









    Out[26]:





array([3.7975573e-05, 1.6347528e-08, 4.6933274e-06, 1.1535371e-06,
       2.3021214e-06, 1.4476281e-02, 6.3532862e-05, 1.2033720e-01,
       1.5022095e-04, 8.6492658e-01], dtype=float32)

A prediction is an array of 10 numbers. These describe the "confidence" of the model that the image corresponds to each of the 10 different articles of clothing



In [27]:

    
np.argmax(predictions[0])  ## to find the maximum confidence label









    Out[27]:





9



In [28]:

    
### Plotting and understanding the prediction across test data

def plot_image(i, predictions_array, true_label, img):
    predictions_array, true_label, img = predictions_array[i], true_label[i], img[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
  
    plt.imshow(img, cmap=plt.cm.binary)
  
    predicted_label = np.argmax(predictions_array)
    if predicted_label == true_label:
        color = 'blue'
    else:
        color = 'red'
  
    plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(predictions_array),
                                class_names[true_label]),
                                color=color)

def plot_value_array(i, predictions_array, true_label):
    predictions_array, true_label = predictions_array[i], true_label[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])
    thisplot = plt.bar(range(10), predictions_array, color="#777777")
    plt.ylim([0, 1])
    predicted_label = np.argmax(predictions_array)
  
    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')



In [29]:

    
### plotting the first test data

i = 0
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions, test_labels, test_images)
plt.subplot(1,2,2)
plot_value_array(i, predictions,  test_labels)
plt.show()



In [30]:

    
i = 12
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plot_image(i, predictions, test_labels, test_images)
plt.subplot(1,2,2)
plot_value_array(i, predictions,  test_labels)
plt.show()



In [31]:

    
# Plot the first X test images, their predicted label, and the true label
# Color correct predictions in blue, incorrect predictions in red
num_rows = 5
num_cols = 3
num_images = num_rows*num_cols
plt.figure(figsize=(2*2*num_cols, 2*num_rows))
for i in range(num_images):
    plt.subplot(num_rows, 2*num_cols, 2*i+1)
    plot_image(i, predictions, test_labels, test_images)
    plt.subplot(num_rows, 2*num_cols, 2*i+2)
    plot_value_array(i, predictions, test_labels)
plt.show()

Predicting a single test image



In [32]:

    
# Grab an image from the test dataset
img = test_images[0]

print(img.shape)

# Add the image to a batch where it's the only member.
img = (np.expand_dims(img,0))

print(img.shape)









    



(28, 28)
(1, 28, 28)



In [33]:

    
predictions_single = model.predict(img)

print(predictions_single)









    



[[3.7975573e-05 1.6347528e-08 4.6933274e-06 1.1535350e-06 2.3021214e-06
  1.4476281e-02 6.3532862e-05 1.2033725e-01 1.5022095e-04 8.6492658e-01]]



In [34]:

    
plot_value_array(0, predictions_single, test_labels)
plt.xticks(range(10), class_names, rotation=45)
plt.show()



In [35]:

    
prediction_result = np.argmax(predictions_single[0])
print(prediction_result)